function sample_data = echoviewParse( filename )
%ECHOVIEWPARSE Parses EchoView results CSV file.
%
% This is an early draft attempt to parse an echoview results
% comma separated variable (CSV) file into a sample_data struct.
%
% This function is almost a generic CSV Parser.
% The list of fields to define and the column names that populate those
% fields are defined in a text file './Parser/echoview_config.txt' or the
% file specified by the property 'echoview.config' if defined.
%
% Limitations:
%   Text quoting is not properly supported (fields cannot contain ,)
%   Dates are only supported in the format yyyymmdd
%   Times are only supported in the format HH:MM:SS.ss
%   Timestamps are only supported as a date field followed by a time field
%   yyyymmdd, HH:MM:SS.ss
%
% Inputs:
%   filename    - CSV results file generated by echoview.
% 
% Outputs:
%   sample_data - Struct containing sample data.
%
% Externals:
%   Property 'echoview.config' and the file it specifies or
%   the file './Parser/echoview_config.txt'
%
% Author: Gordon Keith <gordon.keith@csiro.au>
% 

% Copyright (c) 2010, CSIRO.
% This source is based on and uses code fragments from code that is:
%
% Copyright (c) 2009, eMarine Information Infrastructure (eMII) and Integrated 
% Marine Observing System (IMOS).
% All rights reserved.
% 
% Redistribution and use in source and binary forms, with or without 
% modification, are permitted provided that the following conditions are met:
% 
%     * Redistributions of source code must retain the above copyright notice, 
%       this list of conditions and the following disclaimer.
%     * Redistributions in binary form must reproduce the above copyright 
%       notice, this list of conditions and the following disclaimer in the 
%       documentation and/or other materials provided with the distribution.
%     * Neither the name of the eMII/IMOS nor the names of its contributors 
%       may be used to endorse or promote products derived from this software 
%       without specific prior written permission.
% 
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
% ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
% POSSIBILITY OF SUCH DAMAGE.
%
  error(nargchk(1,1,nargin));
%filename=cellstr('D20080615_T193552_HAC_Sv_38_kHz.csv')
  if ~iscellstr(filename), error('filename must be a cell array of strings'); end

  % only one file supported
  filename = filename{1};

  [path, fname] = fileparts(filename);
  [path, voyage] = fileparts(path);
  [~, vessel] = fileparts(path);
  
  sample_data.meta.instrument_make = 'EchoView';
  sample_data.meta.instrument_model = vessel;
  sample_data.meta.instrument_serial_no = voyage;
  sample_data.EV_csv_file = fname;
  
  map = getFieldMap();
  
  % open the file, and read in the header and data
  try 
    
    fid    = fopen(filename, 'rt');
    
    %
    % read in and parse header
    %
    line = fgetl(fid);
    line(line > 127) = 32;
    map = findColumns(map, line);
    
    % extract dimensions
    dimensions = 0;
    for k = 1:length(map)
       if (map(k).column > 0) && isempty(map(k).dimension) 
           dimensions = dimensions + 1;
           sample_data.dimensions{dimensions}.name = map(k).name;
           sample_data.dimensions{dimensions}.column = map(k).column;
           sample_data.dimensions{dimensions}.type = map(k).type;
           if map(k).type == 'S'
               sample_data.dimensions{dimensions}.data = '';
           else
               sample_data.dimensions{dimensions}.data = [ ];
           end
       end
    end
    
    % extract variables
    variables = 0;
    for k = 1:length(map)
       if (map(k).column > 0) && ~ isempty(map(k).dimension)
           variables = variables + 1;
           sample_data.variables{variables}.name = map(k).name;
           sample_data.variables{variables}.column = map(k).column;
           sample_data.variables{variables}.type = map(k).type;
           sample_data.variables{variables}.dimensions = [];
           if map(k).type == 'S'
               sample_data.variables{dimensions}.data = '';
           else
               sample_data.variables{dimensions}.data = [ ];
           end
           dims = regexp(map(k).dimension, '\s', 'split');
           for d = 1:length(sample_data.dimensions)
               for dd = 1:length(dims)
                 if strcmp(sample_data.dimensions{d}.name, dims(dd))
                    sample_data.variables{variables}.dimensions(dd) = d;
                 end
               end
           end
       end
    end
    
    index = zeros(1,length(sample_data.dimensions));
    
    %
    % read in and parse each line
    %
    line = fgetl(fid);
    while ischar(line)
      fields = getCSVfields(line);
      if length(fields) < 4
          line = fgetl(fid);
          continue;
      end
      
      % read dimensions
      for k = 1:length(sample_data.dimensions)
          column = sample_data.dimensions{k}.column ;
          type = sample_data.dimensions{k}.type;
          value = getValue(fields, column, type);

          % find value in existing data
          index(k) = 0;
          if type == 'S'
              % text string
              for d = 1:size(sample_data.dimensions{k}.data,1)
                  if strcmp(value, char(sample_data.dimensions{k}.data(d,:)))
                      index(k) = d;
                  break;
                  end
              end
              if index(k) == 0
                  % new value
                  index(k) = size(sample_data.dimensions{k}.data,1) + 1;
                  sample_data.dimensions{k}.data(end + 1,:) = value;
              end
          else
              % numeric scalar
              for d = 1:length(sample_data.dimensions{k}.data)
                  if value == sample_data.dimensions{k}.data(d)
                      index(k) = d;
                      break;
                  end
              end
              if index(k) == 0
                  % new value
                  index(k) = length(sample_data.dimensions{k}.data) +1;
                  sample_data.dimensions{k}.data(index(k)) = value;
              end
          end
          
      end
      
      % read variables
      for k = 1:length(sample_data.variables)
          column = sample_data.variables{k}.column ;
          type = sample_data.variables{k}.type;
          dimensions = sample_data.variables{k}.dimensions;
          value = getValue(fields, column, type);

          %TODO doesn't handle strings
          switch size(dimensions,2)
              case 1
                sample_data.variables{k}.data(index(dimensions)) = value;
                
              case 2
                  sample_data.variables{k}.data(index(dimensions(1)), ...
                                                index(dimensions(2))) = value;
              case 3
                  sample_data.variables{k}.data(index(dimensions(1)), ...
                                                index(dimensions(2)), ...
                                                index(dimensions(3))) = value;
              case 4
                  sample_data.variables{k}.data(index(dimensions(1)), ...
                                                index(dimensions(2)), ...
                                                index(dimensions(3)), ...
                                                index(dimensions(4))) = value;
          end
      end
      
      % get the next line
      line = fgetl(fid);
        
    end
    
    fclose(fid);
  
  catch e
    if fid ~= -1, fclose(fid); end
    rethrow(e);
  end
  
  %
  % Convert singletons to globals.
  %
  remove = [];
  remove_var = [];
  for k = 1:length(sample_data.dimensions)
      dimension = sample_data.dimensions{k};
      if (dimension.type == 'S')
          if size(dimension.data,1) == 1
              sample_data.(dimension.name) = char(dimension.data);
              remove(end + 1) = k;
          end
      else
          if length(dimension.data) == 1
              sample_data.(dimension.name) = dimension.data;
              remove(end + 1) = k;
          end
      end
  end
  % remove singleton dimensions from variables and renumber remaining dimensions
  for k = 1:length(sample_data.variables)
      dd = sample_data.variables{k}.dimensions;
      dd(ismember(dd,remove)) = [];
      if isempty(dd)
          % variable is also a singleton
          if sample_data.variables{k}.type == 'S'
              sample_data.(sample_data.variables{k}.name) = ...
                  char(sample_data.variables{k}.data);
          else
              sample_data.(sample_data.variables{k}.name) = ...
                  sample_data.variables{k}.data;
          end
          remove_var(end + 1) = k;
      else
          for ddd = 1:length(dd)
              dd(ddd) = dd(ddd) - sum(dd(ddd) > remove);
          end
          sample_data.variables{k}.dimensions = dd;
      end
  end
  sample_data.dimensions(remove) = [];
  sample_data.variables(remove_var) = [];
  
  %
  % remove column and type
  %
  for k = 1:length(sample_data.dimensions)
      sample_data.dimensions{k} = rmfield(sample_data.dimensions{k}, 'column');
      sample_data.dimensions{k} = rmfield(sample_data.dimensions{k}, 'type');
  end
  for k = 1:length(sample_data.variables)
      sample_data.variables{k} = rmfield(sample_data.variables{k}, 'column');
      sample_data.variables{k} = rmfield(sample_data.variables{k}, 'type');
  end

end

function field_map = getFieldMap()
%GETFIELDMAP returns a map of Variables. 
% Each entry has :
% - the variable name as it will appear in sample_data, 
% - the variables column name as it will appear in the CSV file,
% - the variable's dimension(s), 
% - the variable's type, 
% 

  % get path to config file
  try
      path = readProperty('echoview.config');
  catch e
      path ='';
  end
  if isempty(path) || ~exist(path, 'file')
    path = fullfile(pwd, 'Parser', 'echoview_config.txt');
  end
  
  try
      
    fid = fopen(path, 'rt');
    k = 1;
    field_map = struct;
    
    % read in and parse each line
    line = fgetl(fid);
    while ischar(line)
      
      fields = strtrim(regexp(line, ',', 'split'));
      if length(fields) < 4 || line(1) == '#' || line(1) == '%'
          line = fgetl(fid);
          continue;
      end
      
      field_map(k).name = fields{1};
      field_map(k).column_name = fields{2};
      field_map(k).dimension = fields{3};
      field_map(k).type = fields{4};
      
      
      k = k + 1;
      
      % get the next line
      line = fgetl(fid);
    end
    
    fclose(fid);
  catch e
    if fid ~= -1, fclose(fid); end
    rethrow(e);
  end

end

function field_map = findColumns(field_map, line)
%FINDCOLUMNS takes a CSV header line and matches the column names with
% the column names listed in the field_map.
% The matching column number is stored in the field_map.

    columns = getCSVfields(line);
    
    for k = 1:length(field_map)
        field_map(k).column = 0;
       for j = 1:length(columns)
           if strcmp(columns{j}, field_map(k).column_name)
              field_map(k).column = j; 
           end
       end
       if field_map(k).column == 0
           error(['Could not locate column ' field_map(k).column_name]);
       end
    end

end

function value = getValue(fields, column, type)
%GETVALUE get a value from the indicated column of the specified type
%
% TODO current support for date and time is only yyyymmdd, HH:MM:SS.ss
% it would be nice to support more date/time formats

    value = fields{column};
    if type == 'S'
        if value(1) == '"' && value(end) == '"'
            value([1 end]) = '';
        end
    elseif type == 'N'
        value = str2double(value);
    elseif type == 'D'
        value = datenum(value, 'yyyymmdd');
    elseif type == 'T'
        value = datenum([value '0'], 'HH:MM:SS.FFF');
    elseif strcmp(type,'DT')
        value = strcat(value,  fields(column +1), '0');
        value = datenum(value, 'yyyymmddHH:MM:SS.FFF');
    end
end

function fields = getCSVfields(line)
%GETCSVFIELDS convert a line of a comma separated variable line to an array
% of string fields.
%
% This function is responsible for handling quoted fields etc, 
% which this version doesn't do.
%
% TODO handle text fields that contain commas

%     fields = strtrim(regexp(line, ',', 'split'));
    fields = strtrim(regexprep(regexp(line, ',', 'split'), '"', ''));
end

